'''
@Company: TWL
@Author: xue jian
@Email: xuejian@kanzhun.com
@Date: 2020-04-01 15:15:03
'''
# nohup python -u cut_cascade_data.py > result &
import random, os
import sys
def cut_data(num, path, dates, write_path):
    for date in dates:
        print(date)
        if not os.path.exists(write_path + date):
            os.mkdir(write_path + date)
        cut_files = []
        for i in range(num):
            cut_files.append(open(write_path + date + "/" + str(i), 'wb'))
        f = open(path + date, 'rb')
        count = 0
        for line in f:
            tmp_num = count//10000 % num
            cut_files[tmp_num].write(line)
            count += 1

dates = []
# dates = ["2020-03-0" + str(i) for i in range(1, 10)]
# dates.extend(["2020-03-" + str(i) for i in range(10, 32)])
# dates.extend(["2020-04-0" + str(i) for i in range(1, 7)])
# dates.extend(["2020-04-" + str(i) for i in range(10, 13)])
# dates.extend(["2020-04-" + str(i) for i in range(13, 21)])
# dates.extend(["2020-04-" + str(i) for i in range(21, 26)])
# dates.extend(["2020-04-" + str(i) for i in range(26, 28)])
# dates.extend(["2020-04-" + str(i) for i in range(28, 31)])
# dates.extend(["2020-05-0" + str(i) for i in range(1, 3)])
# dates.extend(["2020-05-0" + str(i) for i in range(3, 6)])
# dates.extend(["2020-05-0" + str(i) for i in range(6, 7)])
# dates.extend(["2020-05-0" + str(i) for i in range(7, 8)])
dates.append(sys.argv[1])

print(dates)
cut_data(6, "/data3/training_data/galaxy_data_new/", dates, "/data3/training_data/galaxy_data_cut/")